Assignment 8

Lorenzo Biasi, Julius Vernie



In [1]:

    
import numpy as np
import matplotlib.pyplot as plt
from scipy.io import loadmat
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm

%matplotlib inline



In [2]:

    
data = loadmat('Tut8_file1.mat')
locals().update(data)
data.keys()

T, p = state.shape
alpha = .1
beta = 1.
index = np.zeros((3, 3), dtype=int)
index[1, 1] = 0
index[2, 1] = 1
index[1, 2] = 2
index[2, 2] = 3



In [3]:

    
def compute_V(alpha):
    V = np.zeros((4, T + 1))
    i = index[state[0], response[0]][0]
    for t in range(T):
        V[:, t + 1] = V[:, t]
        i = index[state[t], response[t]][0]
        V[i, t + 1] += alpha * (reward[t] - V[i, t])
    return V



In [4]:

    
V = compute_V(.1)
plt.plot(V.T)
plt.legend(['V_11', 'V_12', 'V_21', 'V_22'])









    Out[4]:





<matplotlib.legend.Legend at 0x7f3cf19901d0>

It's clear that something changed at about the 70th time step, what happend is that the scientist changed the rule for the second "state", so that using as a response the same lever would be wrong and the opposite correct. We can see how in the case for the first state that does not happen.



In [5]:

    
np.vstack((reward[np.logical_and(state == response, state == 2)], np.arange(T)[np.logical_and(state == response, state == 2).flatten()]))









    Out[5]:





array([[  1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
          1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
          1,   1,   1,   1,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0],
       [  3,   4,   7,   8,  11,  12,  16,  18,  20,  22,  25,  27,  30,
         32,  34,  36,  38,  40,  43,  45,  47,  49,  51,  53,  55,  56,
         58,  60,  63,  65,  67,  70,  71,  73,  75,  78,  79,  83,  85,
         87,  89,  98,  99, 101, 104, 106, 119]])



In [ ]:

    
reward[np.logical_and(state == response , state == 1)]









    Out[ ]:





array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=uint8)



In [ ]:

    
alphas = np.arange(0., 1., .01)
betas = np.arange(0., 3., .01)
LL = np.zeros((len(alphas), len(betas)))

for j, alpha in enumerate(alphas):
    V = compute_V(alpha)
    for k, beta in enumerate(betas):
        for t in range(0, T):
            i = index[state[t], response[t]][0]
            i_s = index[state[t], 1:]
            LL[j, k] += beta * V[i, t] - np.log(np.sum(np.exp(beta * V[i_s, t])))



In [ ]:

    
X, Y = np.meshgrid(betas, alphas)

fig = plt.figure()
ax = fig.gca(projection='3d')
ax.plot_surface(X, Y, LL, cmap=cm.coolwarm,
                       linewidth=0, antialiased=False)
plt.xlabel('beta')
plt.ylabel('alpha')

plt.figure()
plt.rcParams['figure.figsize'] = [7.0, 10.0]
plt.imshow(LL[::-1], extent=(0, 3, 0, 1))
plt.colorbar(orientation='horizontal')
plt.xlabel('beta')
plt.ylabel('alpha')



In [ ]:

    
max_a, max_b = np.unravel_index(LL.argmax(), LL.shape)
print('The alpha that maximizes the loglikelihood is:', alphas[max_a])
print('The beta that maximizes the loglikelihood is:', betas[max_b])



In [ ]: